{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cluster Likert Questions"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Original survey data:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "respondent_id",
"rawType": "int64",
"type": "integer"
},
{
"name": "q1_ease_of_use",
"rawType": "object",
"type": "string"
},
{
"name": "q2_product_quality",
"rawType": "object",
"type": "string"
},
{
"name": "q3_value_for_money",
"rawType": "object",
"type": "string"
},
{
"name": "q4_customer_service",
"rawType": "object",
"type": "string"
},
{
"name": "q5_would_recommend",
"rawType": "object",
"type": "string"
},
{
"name": "q6_meets_expectations",
"rawType": "object",
"type": "string"
},
{
"name": "q7_better_than_competitors",
"rawType": "object",
"type": "string"
},
{
"name": "q8_overall_satisfaction",
"rawType": "object",
"type": "string"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "1e525443-79e3-4d45-87f2-80f1812057d5",
"rows": [
[
"0",
"1",
"Agree",
"Strongly Agree",
"Strongly Agree",
"Agree",
"Strongly Agree",
"Neither Agree nor Disagree",
"Neither Agree nor Disagree",
"Agree"
],
[
"1",
"2",
"Strongly Agree",
"Strongly Agree",
"Strongly Agree",
"Agree",
"Agree",
"Strongly Agree",
"Strongly Agree",
"Agree"
],
[
"2",
"3",
"Strongly Agree",
"Neither Agree nor Disagree",
"Agree",
"Neither Agree nor Disagree",
"Strongly Agree",
"Agree",
"Strongly Agree",
"Strongly Agree"
],
[
"3",
"4",
"Agree",
"Agree",
"Strongly Agree",
"Agree",
"Strongly Agree",
"Strongly Agree",
"Strongly Agree",
"Agree"
],
[
"4",
"5",
"Agree",
"Strongly Agree",
"Agree",
"Agree",
"Strongly Agree",
"Agree",
"Strongly Agree",
"Agree"
]
],
"shape": {
"columns": 9,
"rows": 5
}
},
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" respondent_id | \n",
" q1_ease_of_use | \n",
" q2_product_quality | \n",
" q3_value_for_money | \n",
" q4_customer_service | \n",
" q5_would_recommend | \n",
" q6_meets_expectations | \n",
" q7_better_than_competitors | \n",
" q8_overall_satisfaction | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" Agree | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Neither Agree nor Disagree | \n",
" Neither Agree nor Disagree | \n",
" Agree | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" Strongly Agree | \n",
" Neither Agree nor Disagree | \n",
" Agree | \n",
" Neither Agree nor Disagree | \n",
" Strongly Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
" Strongly Agree | \n",
" Agree | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" respondent_id q1_ease_of_use q2_product_quality \\\n",
"0 1 Agree Strongly Agree \n",
"1 2 Strongly Agree Strongly Agree \n",
"2 3 Strongly Agree Neither Agree nor Disagree \n",
"3 4 Agree Agree \n",
"4 5 Agree Strongly Agree \n",
"\n",
" q3_value_for_money q4_customer_service q5_would_recommend \\\n",
"0 Strongly Agree Agree Strongly Agree \n",
"1 Strongly Agree Agree Agree \n",
"2 Agree Neither Agree nor Disagree Strongly Agree \n",
"3 Strongly Agree Agree Strongly Agree \n",
"4 Agree Agree Strongly Agree \n",
"\n",
" q6_meets_expectations q7_better_than_competitors \\\n",
"0 Neither Agree nor Disagree Neither Agree nor Disagree \n",
"1 Strongly Agree Strongly Agree \n",
"2 Agree Strongly Agree \n",
"3 Strongly Agree Strongly Agree \n",
"4 Agree Strongly Agree \n",
"\n",
" q8_overall_satisfaction \n",
"0 Agree \n",
"1 Agree \n",
"2 Strongly Agree \n",
"3 Agree \n",
"4 Agree "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 03_cluster_likert_questions.ipynb\n",
"import pandas as pd\n",
"import numpy as np\n",
"from pandas_survey_toolkit import nlp\n",
"from pandas_survey_toolkit.vis import cluster_heatmap_plot\n",
"\n",
"# Create sample survey data with Likert scale responses\n",
"# Let's simulate a product satisfaction survey with 20 respondents and 8 Likert questions\n",
"\n",
"# Define our questions\n",
"questions = [\n",
" 'q1_ease_of_use', \n",
" 'q2_product_quality',\n",
" 'q3_value_for_money',\n",
" 'q4_customer_service',\n",
" 'q5_would_recommend',\n",
" 'q6_meets_expectations',\n",
" 'q7_better_than_competitors',\n",
" 'q8_overall_satisfaction'\n",
"]\n",
"\n",
"# Define our Likert scale options\n",
"likert_options = [\n",
" 'Strongly Disagree',\n",
" 'Disagree',\n",
" 'Neither Agree nor Disagree',\n",
" 'Agree',\n",
" 'Strongly Agree'\n",
"]\n",
"\n",
"POPULATION = 200\n",
"# Create DataFrame with 20 respondents\n",
"np.random.seed(42)\n",
"data = {'respondent_id': range(1, POPULATION)}\n",
"\n",
"# Generate random Likert responses with some patterns\n",
"# Group 1 (respondents 1-7): Generally positive\n",
"# Group 2 (respondents 8-14): Generally negative\n",
"# Group 3 (respondents 15-20): Mixed responses\n",
"\n",
"for q in questions:\n",
" responses = []\n",
" for i in range(1, POPULATION):\n",
" if i <= (0.3 * POPULATION): # Positive group\n",
" responses.append(np.random.choice(likert_options[2:], p=[0.1, 0.5, 0.4]))\n",
" elif i <= (0.6 * POPULATION): # Negative group\n",
" responses.append(np.random.choice(likert_options[:3], p=[0.3, 0.5, 0.2]))\n",
" else: # Don't care group\n",
" responses.append(np.random.choice(likert_options[1:4], p=[0.1,0.8,0.1]))\n",
" data[q] = responses\n",
"\n",
"# Create DataFrame\n",
"df = pd.DataFrame(data)\n",
"\n",
"# Display the original data\n",
"print(\"Original survey data:\")\n",
"display(df.head())\n",
"\n",
"# Define custom mapping for Likert scale values\n",
"custom_mapping = {\n",
" 'strongly disagree': -1,\n",
" 'disagree': -1,\n",
" 'neither agree nor disagree': 0,\n",
" 'agree': 1,\n",
" 'strongly agree': 1\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using default mapping:\n",
"-1: Phrases containing 'disagree', 'do not agree', etc.\n",
" 0: Phrases containing 'neutral', 'neither', 'unsure', etc.\n",
"+1: Phrases containing 'agree' (but not 'disagree' or 'not agree')\n",
"NaN: NaN values are preserved\n",
" Agree -> 1: 282 times\n",
" Strongly Agree -> 1: 199 times\n",
" Neither Agree nor Disagree -> 0: 668 times\n",
" Disagree -> -1: 293 times\n",
" Strongly Disagree -> -1: 150 times\n",
"\n",
"Encoded Likert data:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "respondent_id",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q1_ease_of_use",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q2_product_quality",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q3_value_for_money",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q4_customer_service",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q5_would_recommend",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q6_meets_expectations",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q7_better_than_competitors",
"rawType": "int64",
"type": "integer"
},
{
"name": "likert_encoded_q8_overall_satisfaction",
"rawType": "int64",
"type": "integer"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "0fb30361-1b2d-4d07-b4f8-f350bffdc906",
"rows": [
[
"0",
"1",
"1",
"1",
"1",
"1",
"1",
"0",
"0",
"1"
],
[
"1",
"2",
"1",
"1",
"1",
"1",
"1",
"1",
"1",
"1"
],
[
"2",
"3",
"1",
"0",
"1",
"0",
"1",
"1",
"1",
"1"
],
[
"3",
"4",
"1",
"1",
"1",
"1",
"1",
"1",
"1",
"1"
],
[
"4",
"5",
"1",
"1",
"1",
"1",
"1",
"1",
"1",
"1"
]
],
"shape": {
"columns": 9,
"rows": 5
}
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" respondent_id | \n",
" likert_encoded_q1_ease_of_use | \n",
" likert_encoded_q2_product_quality | \n",
" likert_encoded_q3_value_for_money | \n",
" likert_encoded_q4_customer_service | \n",
" likert_encoded_q5_would_recommend | \n",
" likert_encoded_q6_meets_expectations | \n",
" likert_encoded_q7_better_than_competitors | \n",
" likert_encoded_q8_overall_satisfaction | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" respondent_id likert_encoded_q1_ease_of_use \\\n",
"0 1 1 \n",
"1 2 1 \n",
"2 3 1 \n",
"3 4 1 \n",
"4 5 1 \n",
"\n",
" likert_encoded_q2_product_quality likert_encoded_q3_value_for_money \\\n",
"0 1 1 \n",
"1 1 1 \n",
"2 0 1 \n",
"3 1 1 \n",
"4 1 1 \n",
"\n",
" likert_encoded_q4_customer_service likert_encoded_q5_would_recommend \\\n",
"0 1 1 \n",
"1 1 1 \n",
"2 0 1 \n",
"3 1 1 \n",
"4 1 1 \n",
"\n",
" likert_encoded_q6_meets_expectations \\\n",
"0 0 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"\n",
" likert_encoded_q7_better_than_competitors \\\n",
"0 0 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 \n",
"\n",
" likert_encoded_q8_overall_satisfaction \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Question clustering results:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "respondent_id",
"rawType": "int64",
"type": "integer"
},
{
"name": "question_cluster_id",
"rawType": "float64",
"type": "float"
},
{
"name": "question_cluster_probability",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "254dc5ba-94ba-4684-99a5-a022533a00b8",
"rows": [
[
"0",
"1",
"0.0",
"0.5702343598374491"
],
[
"1",
"2",
"0.0",
"1.0"
],
[
"2",
"3",
"0.0",
"0.5627238816993246"
],
[
"3",
"4",
"0.0",
"1.0"
],
[
"4",
"5",
"0.0",
"1.0"
]
],
"shape": {
"columns": 3,
"rows": 5
}
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" respondent_id | \n",
" question_cluster_id | \n",
" question_cluster_probability | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1 | \n",
" 0.0 | \n",
" 0.570234 | \n",
"
\n",
" \n",
" | 1 | \n",
" 2 | \n",
" 0.0 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" | 2 | \n",
" 3 | \n",
" 0.0 | \n",
" 0.562724 | \n",
"
\n",
" \n",
" | 3 | \n",
" 4 | \n",
" 0.0 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" | 4 | \n",
" 5 | \n",
" 0.0 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" respondent_id question_cluster_id question_cluster_probability\n",
"0 1 0.0 0.570234\n",
"1 2 0.0 1.000000\n",
"2 3 0.0 0.562724\n",
"3 4 0.0 1.000000\n",
"4 5 0.0 1.000000"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Cluster heatmap showing the sentiment distribution across questions:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"\n",
""
],
"text/plain": [
"alt.VConcatChart(...)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Cluster averages for each question:\n"
]
},
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "question_cluster_id",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q1_ease_of_use",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q2_product_quality",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q3_value_for_money",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q4_customer_service",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q5_would_recommend",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q6_meets_expectations",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q7_better_than_competitors",
"rawType": "float64",
"type": "float"
},
{
"name": "likert_encoded_q8_overall_satisfaction",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "8df84408-1730-41d7-b372-6aca72ddf886",
"rows": [
[
"0.0",
"0.8524590163934426",
"0.8852459016393442",
"0.819672131147541",
"0.8688524590163934",
"0.9180327868852459",
"0.8524590163934426",
"0.8852459016393442",
"0.8852459016393442"
],
[
"1.0",
"0.04081632653061224",
"-0.08163265306122448",
"0.12244897959183673",
"0.08163265306122448",
"0.08163265306122448",
"-0.04081632653061224",
"0.1836734693877551",
"0.0"
],
[
"2.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0",
"0.0"
],
[
"3.0",
"-0.6805555555555556",
"-0.6944444444444444",
"-0.5694444444444444",
"-0.7083333333333334",
"-0.8333333333333334",
"-0.6666666666666666",
"-0.7777777777777778",
"-0.7083333333333334"
]
],
"shape": {
"columns": 8,
"rows": 4
}
},
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" likert_encoded_q1_ease_of_use | \n",
" likert_encoded_q2_product_quality | \n",
" likert_encoded_q3_value_for_money | \n",
" likert_encoded_q4_customer_service | \n",
" likert_encoded_q5_would_recommend | \n",
" likert_encoded_q6_meets_expectations | \n",
" likert_encoded_q7_better_than_competitors | \n",
" likert_encoded_q8_overall_satisfaction | \n",
"
\n",
" \n",
" | question_cluster_id | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0.0 | \n",
" 0.852459 | \n",
" 0.885246 | \n",
" 0.819672 | \n",
" 0.868852 | \n",
" 0.918033 | \n",
" 0.852459 | \n",
" 0.885246 | \n",
" 0.885246 | \n",
"
\n",
" \n",
" | 1.0 | \n",
" 0.040816 | \n",
" -0.081633 | \n",
" 0.122449 | \n",
" 0.081633 | \n",
" 0.081633 | \n",
" -0.040816 | \n",
" 0.183673 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 2.0 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" | 3.0 | \n",
" -0.680556 | \n",
" -0.694444 | \n",
" -0.569444 | \n",
" -0.708333 | \n",
" -0.833333 | \n",
" -0.666667 | \n",
" -0.777778 | \n",
" -0.708333 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" likert_encoded_q1_ease_of_use \\\n",
"question_cluster_id \n",
"0.0 0.852459 \n",
"1.0 0.040816 \n",
"2.0 0.000000 \n",
"3.0 -0.680556 \n",
"\n",
" likert_encoded_q2_product_quality \\\n",
"question_cluster_id \n",
"0.0 0.885246 \n",
"1.0 -0.081633 \n",
"2.0 0.000000 \n",
"3.0 -0.694444 \n",
"\n",
" likert_encoded_q3_value_for_money \\\n",
"question_cluster_id \n",
"0.0 0.819672 \n",
"1.0 0.122449 \n",
"2.0 0.000000 \n",
"3.0 -0.569444 \n",
"\n",
" likert_encoded_q4_customer_service \\\n",
"question_cluster_id \n",
"0.0 0.868852 \n",
"1.0 0.081633 \n",
"2.0 0.000000 \n",
"3.0 -0.708333 \n",
"\n",
" likert_encoded_q5_would_recommend \\\n",
"question_cluster_id \n",
"0.0 0.918033 \n",
"1.0 0.081633 \n",
"2.0 0.000000 \n",
"3.0 -0.833333 \n",
"\n",
" likert_encoded_q6_meets_expectations \\\n",
"question_cluster_id \n",
"0.0 0.852459 \n",
"1.0 -0.040816 \n",
"2.0 0.000000 \n",
"3.0 -0.666667 \n",
"\n",
" likert_encoded_q7_better_than_competitors \\\n",
"question_cluster_id \n",
"0.0 0.885246 \n",
"1.0 0.183673 \n",
"2.0 0.000000 \n",
"3.0 -0.777778 \n",
"\n",
" likert_encoded_q8_overall_satisfaction \n",
"question_cluster_id \n",
"0.0 0.885246 \n",
"1.0 0.000000 \n",
"2.0 0.000000 \n",
"3.0 -0.708333 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Number of respondents in each cluster:\n"
]
},
{
"data": {
"text/plain": [
"question_cluster_id\n",
"0.0 61\n",
"1.0 49\n",
"2.0 17\n",
"3.0 72\n",
"Name: count, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"# Use pandas method chaining to process the data\n",
"df_processed = (df\n",
" # Cluster the questions\n",
" .cluster_questions(\n",
" columns=questions, \n",
" #likert_mapping=custom_mapping, default handles most cases\n",
" umap_n_neighbors=15,\n",
" hdbscan_min_cluster_size=15,\n",
" cluster_selection_epsilon=0.35,\n",
" \n",
" )\n",
")\n",
"\n",
"# Get the list of encoded Likert columns\n",
"likert_columns_with_prefix = [f\"likert_encoded_{q}\" for q in questions]\n",
"\n",
"# Display encoded data\n",
"print(\"\\nEncoded Likert data:\")\n",
"display(df_processed[['respondent_id'] + likert_columns_with_prefix].head())\n",
"\n",
"# Display clustering results\n",
"print(\"\\nQuestion clustering results:\")\n",
"display(df_processed[['respondent_id', 'question_cluster_id', 'question_cluster_probability']].head())\n",
"\n",
"\n",
"# Use the cluster_heatmap_plot function to visualize cluster patterns\n",
"print(\"\\nCluster heatmap showing the sentiment distribution across questions:\")\n",
"heatmap = cluster_heatmap_plot(\n",
" df=df_processed,\n",
" x=\"question_cluster_id\", # Cluster IDs as the x-axis\n",
" y=likert_columns_with_prefix, # Encoded Likert columns to analyze\n",
" max_width=30 # For better readability\n",
")\n",
"\n",
"# Display the heatmap\n",
"display(heatmap)\n",
"\n",
"# Let's also add a simple interpretation of the clusters\n",
"cluster_summary = df_processed.groupby('question_cluster_id')[likert_columns_with_prefix].mean()\n",
"print(\"\\nCluster averages for each question:\")\n",
"display(cluster_summary)\n",
"\n",
"# Calculate respondent counts per cluster\n",
"cluster_counts = df_processed['question_cluster_id'].value_counts().sort_index()\n",
"print(\"\\nNumber of respondents in each cluster:\")\n",
"display(cluster_counts)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}